Which resorts will have the lowest maintenance fees in the future? More info about Disney Vacation Club.
import pandas, numpy, seaborn, matplotlib, unicodedata, plotly.graph_objects
import logging, sys, os
from warnings import filterwarnings, resetwarnings
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
pandas.set_option('display.float_format', lambda x: '%.3f' %x if not numpy.isnan(x) else '-')
pandas.options.display.max_colwidth = None
format_accounting = lambda x: '%.3f' %x if not numpy.isnan(x) else '-'
format_number = '{:.0f}'.format
format_float = '{:,.3f}'.format
format_dollar = '<span>$</span>{:,.3f}'.format
max_file_size = 5368709120
file_type = 'CSV'
field_delimiter = '\t'
timestamp_format = 'YYYY-MM-DD HH24:MI:SS.FF'
field_escape = '\\\\'
file_extension = 'txt'
file_compression = 'gzip'
parallel = 99
dvc_resale_market = pandas.read_csv('dvcresalemarket.txt',
delimiter=field_delimiter,
header=0,
keep_default_na=True,
na_filter=True,
infer_datetime_format=True,
memory_map=True,
encoding='windows-1252'
)
maintenance_fees = pandas.read_csv('maintenance_fees.txt',
delimiter=field_delimiter,
header=0,
keep_default_na=True,
na_filter=True,
infer_datetime_format=True,
memory_map=True,
encoding='windows-1252'
# encoding='iso-8859-1'
)
years_left = pandas.read_csv('years_left.txt',
delimiter=field_delimiter,
header=0,
keep_default_na=True,
na_filter=True,
infer_datetime_format=True,
memory_map=True,
encoding='windows-1252' )
dvc_resale_market
maintenance_fees
years_left
maintenance_fees.info()
maintenance_fees.replace(u'\\xa0',u' ', regex=True, inplace=True)
resorts = sorted(list(maintenance_fees.Resort.unique()))
resorts
maintenance_fees.groupby(['Resort'], as_index = False)[['Maintenance Fees']].mean()\
.sort_values("Maintenance Fees")\
.reset_index(drop=True)\
.style.format({'Maintenance Fees':format_dollar}).hide_index()
maintenance_fees[maintenance_fees.Year>=2015].groupby(['Resort'], as_index = False)[['Maintenance Fees']].mean()\
.sort_values("Maintenance Fees")\
.reset_index(drop=True)\
.style.format({'Maintenance Fees':format_dollar}).hide_index()
maintenance_fees["Year"] = maintenance_fees.Year.astype('category')
pandas.pivot_table(
maintenance_fees
,index=["Year"]
,columns=["Resort"]
,values='Maintenance Fees'
,aggfunc=(numpy.sum)
).reset_index()\
.sort_index(ascending = False)\
.style.format(format_accounting).format({'Year':format_number}).background_gradient(cmap='BuPu')\
.hide_index()
maintenance_fees["Year"] = maintenance_fees.Year.astype('int')
maintenance_fees[maintenance_fees.Resort == 'Polynesian'].reset_index(drop=True).style.format(
{'Maintenance Fees':format_float,'y':format_float}).hide_index()
maintenance_fees['ds'] = pandas.to_datetime(maintenance_fees.Year*10000+101, format='%Y%m%d')
maintenance_fees['y'] = maintenance_fees['Maintenance Fees']
fbprophet_columns = {'Maintenance Fees':'y'}
logging.getLogger().setLevel(50)
logging.disable(sys.maxsize)
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
try:
del forecast_frame
except NameError:
False
for resort in resorts:
# filterwarnings("ignore")
m = Prophet()
m.fit(maintenance_fees[['ds','y']][maintenance_fees.Resort == resort])
future = m.make_future_dataframe(periods=22, freq = 'Y')
forecast = m.predict(future)
forecast['resort'] = resort
try:
forecast_frame
except NameError:
forecast_frame = forecast.copy(deep=False)
forecast_frame = forecast_frame.append(forecast, ignore_index=True)
fig1 = m.plot(forecast) # use for GitHub preview
fig1.axes[0].set_title(resort)
# fig2 = m.plot_components(forecast) # not relevant for this case
fig = plot_plotly(m, forecast) # This returns a plotly Figure - better for interactive
fig.update_layout(title_text=resort)
py.iplot(fig)
# resetwarnings()
del m
logging.disable(logging.NOTSET)
forecast_frame
forecast_frame.to_csv(path_or_buf='fbprophet_predictions.csv')
f, axes = matplotlib.pyplot.subplots(1,1, figsize=(15,5), sharex = False, sharey = False)
seaborn.despine()
seaborn_whitegrid = {'axes.grid': False,'axes.spines.right': False, 'axes.spines.top': False, 'axes.edgecolor': '.15',}
with seaborn.axes_style(seaborn_whitegrid):
seaborn.lineplot(x="ds", y="yhat", hue="resort", legend='full',
data=forecast_frame)
matplotlib.pyplot.xlabel('Year')
matplotlib.pyplot.ylabel('Maitenance Fees')
forecast_frame[forecast_frame.ds>'2020-01-01'].rename(columns={'ds':'date','yhat':'avg predicted fees'})\
.groupby(['resort'], as_index = False)[['avg predicted fees']].mean()\
.sort_values("avg predicted fees")\
.reset_index(drop=True)\
.style.format({'avg predicted fees':format_dollar}).hide_index()
fig = plotly.graph_objects.Figure()
annotations = []
range_margin = (forecast_frame.ds.max() - forecast_frame.ds.min()) * 0.05
for resort in resorts:
forecasted_years = numpy.array(forecast_frame['ds'][forecast_frame.resort == resort],dtype='datetime64[D]')
forecasted_fees = numpy.array(forecast_frame['yhat'][forecast_frame.resort == resort])
fig.add_trace(plotly.graph_objects.Scatter(x=forecasted_years, y=forecasted_fees, mode='lines',
name=resort,
line=dict(width=1.5),
connectgaps=True,
))
# labeling the right_side of the plot
annotations.append(dict(xref='paper', x=0.96, y=forecasted_fees[-1],
xanchor='left', yanchor='middle',
text=resort + ' ${:,.2f}'.format(forecasted_fees[-1]),
font=dict(
family='Arial',
size=7
),
showarrow=False))
fig.update_layout(dict(
title='Predicted Fees for Resorts',
width=900,
height=800,
xaxis=dict(
type='date',
linecolor='rgb(204, 204, 204)', #alternatively make axis black
linewidth=1.25,
title='Year',
range = [forecast_frame.ds.min() - range_margin, forecast_frame.ds.max() + range_margin],
ticks='outside',
tickfont=dict(
family='Arial',
size=12,
color='rgb(82, 82, 82)',
),
),
yaxis=dict(
title='Maintenance Fees',
linecolor='rgb(204, 204, 204)', #alternatively make axis black
linewidth=1.25,
ticks='outside',
tickfont=dict(
family='Arial',
size=12,
color='rgb(82, 82, 82)',
),
),
autosize=False,
margin=dict(
autoexpand=False,
),
showlegend=False,
plot_bgcolor='white',
colorway=plotly.colors.qualitative.Pastel,
)
)
fig.update_layout(annotations=annotations)
fig.show()
maintenance_fees[['Resort','Year','Maintenance Fees']][maintenance_fees.Year == 2021]\
.reset_index(drop=True)\
.style.format({'Maintenance Fees':format_dollar}).hide_index()
Cooper Creek ($8.70)
Grand Floridian ($10.96)
Polynesian ($12.20)
Ignoring Riveria which just opened and has 3 data points